import scrapy
from ..items import BossItem


class BossSpider(scrapy.Spider):
    name = 'boss'
    start_urls = ['http://www.zhipin.com/']

    def parse(self, response):
        # print(response.url)
        # print(response.status)
        ids = response.css('div.menu-sub ul li a::attr(href)').extract()
        base_url = 'https://www.zhipin.com'
        for i in ids:
            fullurl = base_url + i
            yield scrapy.Request(fullurl,self.parse_info)

    def parse_info(self,response):
        url = response.url
        pageurl = url + '?page=%d'
        for i in range(1,3):
            fullurl = pageurl % i
            yield scrapy.Request(fullurl,self.parse_info_list)

    def parse_info_list(self,response):
        urls = response.css('div.info-primary a::attr(href)').extract()
        base_url = 'https://www.zhipin.com'
        for i in urls:
            fullurl = base_url + i
            yield scrapy.Request(fullurl, self.parse_info_list_details)

    def parse_info_list_details(self,response):
        item = BossItem()
        url = response.url
        jname = response.css('div.name h1::text').extract()[0]
        jmoney = response.css('div.name span::text').extract()[0]
        area = response.css('div.info-primary p::text').extract()[0].split('：')[-1]
        experience = response.css('div.info-primary p::text').extract()[1].split('：')[-1]
        education = response.css('div.info-primary p::text').extract()[2].split('：')[-1]
        demand = response.css('div.job-tags span::text').extract()
        demand = '-'.join(demand)
        company = response.css('h3.name a::text').extract()[0]
        frequency = response.css('div.info-company p::text').extract()[0]
        numbers = response.css('div.info-company p::text').extract()[1]
        industry = response.css('div.info-company p a::text').extract()[0]
        recruiters = response.css('div.detail-op h2::text').extract()[0]
        detail = response.css('div.job-sec div.text::text').extract()[:-2]
        detail = ''.join(detail).strip()

        item["url"] = url
        item["jname"] = jname
        item["jmoney"] = jmoney
        item["area"] = area
        item["experience"] = experience
        item["education"] = education
        item["demand"] = demand
        item["company"] = company
        item["frequency"] = frequency
        item["numbers"] = numbers
        item["industry"] = industry
        item["recruiters"] = recruiters
        item["detail"] = detail

        yield item
